/*
BRINGING THE EMPLOYER-EMPLOYEE DATA
*/
capture program drop correspondences 
capture program drop e_e_data

********************** 
***  INPUT FILES   ***  
**********************
global analysis_data ""processed_data\analysis_data.dta""
global corresp ""raw_data\2018-08-14 Correspondencias de n° de identificación.xlsx""
global revec_corresp ""raw_data\revec_corresp.dta""
global MNCs ""processed_data\MNC_sample.dta""

******
*CORRESPONDENCES OF FIRM IDs
******
prog correspondences
*FIRMS EXPERIENCING THE EVENTS + MNCs
preserve
use $analysis_data, clear
collapse event, by(ID)
drop if event==.
destring ID, replace
format ID %16.0g
tempfile IDs
save `IDs', replace
restore
use $MNCs, clear
keep ID 
gen MNC_firm=1
destring ID, replace
format ID %16.0g
append using `IDs'
save temp/events.dta, replace

*correspondences
import excel $corresp, sheet("Válidas") firstrow clear
keep Cédulaanteriorfinal Cédulaactual
rename Cédulaanteriorfinal ID 
rename Cédulaactual cedula_new
drop if cedula_new==.
destring ID cedula_new, replace    
format ID cedula_new %24.0g
compress
save temp/correspond.dta, replace

*revec ids
use $revec_corresp, clear
keep ID ID_g
destring ID ID_g , replace
format ID ID_g %24.0g
compress
save temp/corr2.dta, replace

end

******
*MERGING EVENTS WITH EMPLOYER-EMPLOYEE DATA	
******
prog e_e_data
*importing data per year
local first_year=2007
local last_year=2017
tempfile temp_

forvalue k=`first_year'/`last_year'{
display "Year `k'"
quiet{
if $BCCR == 1 {
	import delimited "Y:/Datasets/Raw_Data/Employer_Employee/Sicere`k'.csv", ///
encoding(UTF-8) clear
}

if $BCCR == 0 {
	import delimited "C:\Users\Brayan Segura S\Documents\GitHub\Suppliers_Alfaro-Urena_Manelici_Vasquez\raw_data\Sicere`k'.csv", ///
encoding(UTF-8) clear
tostring sex, replac
}

*renaming
rename *, lower
keep numpatron num_identi tip_ocupac salario f_nac sex year month codpaisnac
gen male=(sex=="1")
drop sex
destring salario numpatron num_identi , replace force 
format numpatron num_identi %16.0g
destring tip_ocupac, replace force
drop if salario==0

* DUPLICATES
ds *
local todos `r(varlist)'
sort `todos' , stable
by `todos': gen dup=cond(_N==1,0,_n)
drop if dup>1
drop dup
rename numpatron ID 
compress 

* CORRESPONDENCES
merge m:1 ID using temp/correspond.dta
drop if _m==2
replace ID=cedula_new if _m==3
drop cedula_new _m
merge m:1 ID using  temp/corr2.dta
drop if _m==2
replace ID=ID_g if _m==3
drop _m
compress

* REPEATED IN SAME FIRM
gen temp_salar=-sala
sort year month ID num_iden temp_salar, stable 
drop temp_salar
by year month ID num_iden: gen dup=cond(_N==1,0,_n)
drop if dup>0
drop dup
gen temp_months=1
rename salar temp_sala
* ADDING TO YEARLY LEVEL
bys num_ide ID year: egen months=total(temp_months)
bys num_ide ID year: egen salario=total(temp_sala)
gen temp_salar=-temp_sala
sort num_ide ID year temp_salar, stable 
drop temp_salar
*erasing duplicates
by num_ide ID year: gen dup=cond(_N==1,0,_n)
drop if dup>1
keep months salar num_ide ID tip_ocupac year f_nac codpaisnac
replace codpais=188 if codpais==0
*saving file for that year
tempfile file_`k'
save `file_`k'', replace
}
}

*appending years 
quiet{
clear
forvalue k=`first_year'/`last_year'{
append using `file_`k''
erase `file_`k''
compress
}
}

*saving final file
compress
save temp/ee_full, replace

end

******************************************************************************** 
*** CODE TO RUN
********************************************************************************
quiet correspondences
quiet e_e_data
